working in progress
Metadados da versao em ingles
Neste relatório são disponibilizados os códigos e as bases de dados utilizados no artigo
Para verificar os códigos, clique em Code
Os chuncks dos códigos estão comentados com informações antecedidas por #
Um versão deste relatorio em formato de script (arquivo .Rmd) pode ser encontrada no github dos autores
Os dados podem acessados em suas fontes originais nos respectivos links fornecidos oportunamente
Carregar pacotes
Definir funções
# limpar enviroment
rm(list = ls())
# Not run - Particular settings to my computer
path <- "C:/r_files/my_academic_projects/capacidades_ingles"
setwd(path)
# Desabilitar notação científica
#options(scipen = 999)
# Instalar este pacote para quem não o tem
#install.packages("pacman")
pacman::p_load(tidyverse, readxl, janitor, sjPlot,
scales, DataExplorer, DT, gridExtra,
viridis, ggcorrplot, VGAM, geobr, ggthemes,
snakecase, abjutils, Hmisc)
# A funcao carrega dois objetos:
# 1 - O dicionário da Munic, contido na primeira aba do excel;
# 2 - Os dados das Munics contidos em todas as demais abas, unidos por "A1"
carregar_munics <- function(link_da_munic, ano){
# definir diretório e arquivo temporário
wd_origin <- getwd()
temp_dir <- tempdir()
setwd(temp_dir)
file.remove(list.files(path = temp_dir))
temp_file <- tempfile(tmpdir = temp_dir)
# Criar lista para armazenar arquivos
Munic_list <- list()
# Download da Munic
download.file(url = link_da_munic, destfile = temp_file)
# unizip
unzip(temp_file)
# remover temporário
file.remove(temp_file)
# selecionar o arquivo xls
file.xls <- list.files(pattern = "xls")
# Nome da Munic
Munic <- paste0("Munic_", ano)
# Carregar todas as abas do excel
Munic <- file.xls %>%
excel_sheets() %>%
set_names() %>%
map(read_excel, path = file.xls)
# remover arquivo xls da pasta temporaria
file.remove(file.xls)
# Nome do dicionario
dic <- paste0("dic_", ano)
# selecionar dicionário
dic <- Munic[[1]]
# excluir dicionário
Munic[[1]] <- NULL
# Mesclar todas as abas do excel
Munic <- Munic %>%
reduce(full_join, by = "A1")
# Criar nome da lista
list <- paste0("list_", Munic)
# criar lista com dicionário e Munic
list <- list(dic, Munic)
# retornar ao diretório orginal
setwd(wd_origin)
# retornar objeto list com dicionário e Munics
return(list)
}
########################################
# Munic 2014
Munic_14_link <- "ftp://ftp.ibge.gov.br/Perfil_Municipios/2014/base_MUNIC_xls_2014.zip"
Munic_14 <- carregar_munics(link_da_munic = Munic_14_link,
ano = 2014)
Munic_dic_14 <- Munic_14[[1]]
Munic_14 <- Munic_14[[2]]
########################################
# Munic 2013 - Suplementar
Munic_13sup_link <- "ftp://ftp.ibge.gov.br/Perfil_Municipios/Assistencia_Social_2013/xls/base_assist_social_2013.zip"
Munic_13 <- carregar_munics(link_da_munic = Munic_13sup_link,
ano = 2013)
Munic_dic_13 <- Munic_13[[1]]
Munic_13 <- Munic_13[[2]]
########################################
# Munic 2015
Munic_15_link <- "ftp://ftp.ibge.gov.br/Perfil_Municipios/2015/Base_de_Dados/Base_MUNIC_2015_xls.zip"
Munic_15 <- carregar_munics(link_da_munic = Munic_15_link,
ano = 2015)
Munic_dic_15 <- Munic_15[[1]]
Munic_15 <- Munic_15[[2]]
########################################
# Transparencia
# Os dados foram baixados do link abaixo e armazedos no meu github para faciltar o download aqui
#
# https://sig.mpf.mp.br/sig/servlet/mstrWeb?evt=3140&src=mstrWeb.3140&documentID=CD5BD3BA11E621B2E4D90080EFC54015&server=MSTRIS.PGR.MPF.MP.BR&Project=Ranking%20da%20Transparencia&port=0&share=1
transp <- rio::import("https://github.com/ronycoelho/databases/raw/master/D01_Ranking_da_Transpar%C3%AAncia_2016.xlsx", skip=2) %>%
janitor::clean_names()
########################################
# Pib
pib_link <- "ftp://ftp.ibge.gov.br/Pib_Municipios/2017/base/base_de_dados_2010_2017_xls.zip"
temp_dir <- tempdir()
#
temp_file2 <- tempfile(tmpdir = temp_dir)
# download
download.file(url = pib_link, destfile = temp_file2)
#unzip
unzip(temp_file2)
# selecionar arquivo
file.xls <- list.files(pattern = "xls")
pib <- rio::import(file.xls)
file.remove(file.xls)
########################################
# Carregar dados do idh-m
idhm_link <- "http://atlasbrasil.org.br/2013/data/rawData/atlas2013_dadosbrutos_pt.xlsx"
idhm <- rio::import(idhm_link, sheet = 2)
#idhm <- read_excel("idh_census.xlsx", sheet = 2)
########################################
# Shape files dos mapas
shape_mun <- geobr::read_municipality(simplified = T, showProgress = F)
shape_estado <- geobr::read_state(simplified = T, showProgress = F)
# Uso particular para configurações no computador pessoal
#getwd()
#save.image("capacities_raw_data.RData")
load("capacities_raw_data.RData")
###########################################
# Munic 2014 - saúde e educação
index_14 <- c("A263", # Conselho de Saúde
"A273", # Plano de Saúde
"A207", # Conselho de Educação
"A203")
Munic_14_sel <- Munic_14 %>%
dplyr::select(A1,
A1022:A1029, # região, população e porte
index_14)
###########################################
#Munic 2013 - Assistência Social
index_13 <- c("A1",
"A199", # Conselho da Assit. Social
"A149") # Plano da Assist. Social
Munic_13_sel <- Munic_13 %>%
dplyr::select(index_13)
###########################################
# Munic 2015 - consórcios
index_15 <- c("A1",
"A151", # Consórcio de Educação
"A155", # Consórcio de Saúde
"A159") # Consórcio de Assist. Social
Munic_15_sel <- Munic_15 %>%
dplyr::select(index_15)
###########################################
# Transparencia - 2016
transp <- transp %>%
select(uf, municipio, nota = nota_2ª_avaliacao) %>%
mutate(municipio = to_snake_case(rm_accent(municipio)),
uf = str_to_lower(uf),
manual_id = paste0(uf, "_",municipio)) %>%
# Excluir estado
filter(municipio != "estado")
###########################################
# PIB - 2014
pib <- clean_names(pib)
pib_sel <- pib %>% #glimpse()
filter(ano == 2014) %>%
select(A1 = codigo_do_municipio, ano,
pib_total = produto_interno_bruto_a_precos_correntes_r_1_000,
pib_per_cap = produto_interno_bruto_per_capita_a_precos_correntes_r_1_00)%>%
arrange(desc(pib_per_cap))
###########################################
# IDH-M - 2010
idhm_sel <- idhm %>%
dplyr::select(A1 = Codmun7, ANO, IDHM) %>%
filter(ANO==2010) %>%
arrange(desc(IDHM))
# garantir que os códigos possuem a mesma classficação
Munic_13_sel$A1 <- as.character(Munic_13_sel$A1)
Munic_15_sel$A1 <- as.character(Munic_15_sel$A1)
Munic_14_sel$A1 <- as.character(Munic_14_sel$A1)
pib_sel$A1 <- as.character(pib_sel$A1)
idhm_sel$A1 <- as.character(idhm_sel$A1)
# Criar variavel com código de 6 digitos para mesclar com a Munic 2013 e mesclar todas as Munics
capacities <- Munic_14_sel %>%
mutate(A1a = str_sub(A1,start = 1, end = 6)) %>%
select(A1, A1a, everything()) %>%
full_join(Munic_13_sel, by = c("A1a"="A1")) %>%
full_join(Munic_15_sel, by = c("A1"="A1"))
# Mesclar pib e idhm
capacities <- capacities %>%
full_join(pib_sel) %>%
full_join(idhm_sel)
# Criar variavel com manual_id para mesclar com a base da transparencia
capacities <- capacities %>%
mutate(mun_temp = to_snake_case(rm_accent(A1027)),
uf_temp = str_to_lower(A1026),
manual_id = paste0(uf_temp, "_",mun_temp)) %>%
select(-c(mun_temp, uf_temp))
capacities <- capacities %>%
inner_join(transp, by = "manual_id") %>%
relocate(nota, .after = A159) %>%
select(-c(municipio, uf, manual_id))
Clique na seta no topo à direita para ver todas as colunas
capacities %>% head(10)
capacities <- capacities %>%
select(cod_mun = A1,
nm_mun = A1027,
populacao = A1028,
faixa_pop = A1029,
cod_est = A1022,
nm_est = A1025,
sg_est = A1026,
regiao = A1024,
# Planos
pl_sa = A273,
pl_as = A149,
pl_ed = A203,
# Conselhos
chl_sa = A263,
chl_as = A199,
chl_ed = A207,
# Consórciso
consor_ed = A151,
consor_sa = A155,
consor_as = A159,
# Transparencia
nota_transp = nota,
#
pib_total = pib_total,
pib_per_cap = pib_per_cap,
ano_pib = ano,
idhm = IDHM,
ano_idhm = ANO)
# Verificar existência de NAs
DataExplorer::plot_missing(capacities)
# NA - presentes apenas nos IDHM
# Excluir NA's
# Removidos 5 municipios não criados até 2010, portante sem o IDHM para esse ano.
capacities <- capacities %>%
drop_na(idhm)
DataExplorer::plot_missing(capacities)
Podem ser baixados em formato excel ou csv
Legenda:
capacities %>%
datatable(extensions = 'Buttons',
rownames = F,
options = list(dom = 'Blfrtip',
buttons = c('csv', 'excel'),
autoFill = TRUE,
fixedHeader = TRUE,
autowidth = TRUE,
paging = F,
scrollX = TRUE,
scrollY = "400px"))
# Uso particular para configurações de uso pessoal
#setwd("C:/r_files/my_academic_projects/capacidades/capacitties")
#save.image("capacities_1_raw.RData")
#load("capacities_1_raw.RData")
De 5.272 mun. com dados disponiveis passamos a trabalhar com 5.236 (99%, ou 94% dos 5.570)
capacities_2 <- capacities %>%
filter(populacao <= 500000) %>%
mutate(faixa_pop = faixa_pop %>% as.factor())
# capacities_2 %>%
# mutate(regiao = str_remove(regiao, "\\d - "),
# ) %>% glimpse()
Visualizar:
Quantidades em cada categoria
Estatisticas descritivas básicas
Faixa de população por região
# unique(capacities_2$faixa_pop)
summary(capacities_2$faixa_pop) %>% knitr::kable()
| x | |
|---|---|
| 1 - Até 5000 | 1166 |
| 2 - 5001 até 10000 | 1122 |
| 3 - 10001 até 20000 | 1310 |
| 4 - 20001 até 50000 | 1039 |
| 5 - 50001 até 100000 | 341 |
| 6 - 100001 até 500000 | 258 |
summary(capacities_2$populacao)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 822 5450 11570 26551 24878 499776
sjt.xtab(capacities_2$faixa_pop,
capacities_2$regiao,
encoding = "windows",
show.summary = F,
show.row.prc = T,
show.col.prc = T,
title = "Faixa de população por região")
| faixa_pop | regiao | Total | ||||
|---|---|---|---|---|---|---|
| 1 - Norte | 2 - Nordeste | 3 - Sudeste | 4 - Sul | 5 - Centro-Oeste | ||
| 1 - Até 5000 |
73 6.3 % 17.3 % |
194 16.6 % 12 % |
351 30.1 % 22.2 % |
413 35.4 % 35.5 % |
135 11.6 % 29.9 % |
1166 100 % 22.3 % |
| 2 - 5001 até 10000 |
78 7 % 18.4 % |
305 27.2 % 18.9 % |
360 32.1 % 22.8 % |
271 24.2 % 23.3 % |
108 9.6 % 23.9 % |
1122 100 % 21.4 % |
| 3 - 10001 até 20000 |
101 7.7 % 23.9 % |
529 40.4 % 32.7 % |
353 26.9 % 22.4 % |
227 17.3 % 19.5 % |
100 7.6 % 22.1 % |
1310 100 % 25 % |
| 4 - 20001 até 50000 |
104 10 % 24.6 % |
420 40.4 % 26 % |
285 27.4 % 18.1 % |
155 14.9 % 13.3 % |
75 7.2 % 16.6 % |
1039 100 % 19.8 % |
| 5 - 50001 até 100000 |
43 12.6 % 10.2 % |
120 35.2 % 7.4 % |
106 31.1 % 6.7 % |
52 15.2 % 4.5 % |
20 5.9 % 4.4 % |
341 100 % 6.5 % |
|
6 - 100001 até 500000 |
24 9.3 % 5.7 % |
50 19.4 % 3.1 % |
123 47.7 % 7.8 % |
47 18.2 % 4 % |
14 5.4 % 3.1 % |
258 100 % 4.9 % |
| Total |
423 8.1 % 100 % |
1618 30.9 % 100 % |
1578 30.1 % 100 % |
1165 22.2 % 100 % |
452 8.6 % 100 % |
5236 100 % 100 % |
character_col <- c("cod_mun", "nm_mun")
factor_col <- c("cod_est", "nm_est", "sg_est","faixa_pop", "regiao", "pl_sa", "pl_as", "pl_ed", "chl_sa", "chl_as", "chl_ed", "consor_ed", "consor_ed", "consor_sa", "consor_as")
numeric_col <- c("populacao", "pib_total", "pib_per_cap", "ano_pib", "idhm","ano_idhm", "nota_transp")
capacities_2 <- capacities_2 %>%
mutate_at(character_col, as.character) %>%
mutate_at(factor_col, as.factor) %>%
mutate_at(numeric_col, as.numeric)
Numéricas: são apresentados valores mínimos, máximos, média, mediana etc.
Categóricas: categorias e quantidade de cada uma
Character: quantidade de valores únicos
summary(capacities_2)
## cod_mun nm_mun populacao
## Length:5236 Length:5236 Min. : 822
## Class :character Class :character 1st Qu.: 5450
## Mode :character Mode :character Median : 11570
## Mean : 26551
## 3rd Qu.: 24878
## Max. :499776
##
## faixa_pop cod_est nm_est
## 1 - Até 5000 :1166 31 : 788 Minas Gerais : 788
## 2 - 5001 até 10000 :1122 35 : 626 São Paulo : 626
## 3 - 10001 até 20000 :1310 43 : 484 Rio Grande do Sul: 484
## 4 - 20001 até 50000 :1039 29 : 405 Bahia : 405
## 5 - 50001 até 100000 : 341 41 : 394 Paraná : 394
## 6 - 100001 até 500000: 258 42 : 287 Santa Catarina : 287
## (Other):2252 (Other) :2252
## sg_est regiao pl_sa pl_as
## MG : 788 1 - Norte : 423 Não : 125 Não : 311
## SP : 626 2 - Nordeste :1618 Não informado: 3 Recusa: 1
## RS : 484 3 - Sudeste :1578 Recusa : 1 Sim :4924
## BA : 405 4 - Sul :1165 Sim :5107
## PR : 394 5 - Centro-Oeste: 452
## SC : 287
## (Other):2252
## pl_ed chl_sa chl_as chl_ed
## Não :2912 Não : 11 Não : 6 Não : 617
## Não informado: 3 Não informado: 2 Recusa: 1 Não informado: 3
## Recusa : 1 Recusa : 1 Sim :5229 Recusa : 1
## Sim :2320 Sim :5222 Sim :4615
##
##
##
## consor_ed consor_sa consor_as
## Não :4888 Não :2557 Não :4800
## Não informado: 1 Não informado: 1 Não informado: 1
## Recusa : 1 Recusa : 1 Recusa : 1
## Sim : 346 Sim :2677 Sim : 434
##
##
##
## nota_transp pib_total pib_per_cap ano_pib
## Min. : 0.000 Min. : 11715 Min. : 3082 Min. :2014
## 1st Qu.: 3.400 1st Qu.: 67344 1st Qu.: 8102 1st Qu.:2014
## Median : 5.600 Median : 147980 Median : 14240 Median :2014
## Mean : 5.332 Mean : 639404 Mean : 18941 Mean :2014
## 3rd Qu.: 7.500 3rd Qu.: 385900 3rd Qu.: 22980 3rd Qu.:2014
## Max. :10.000 Max. :58004933 Max. :815698 Max. :2014
##
## idhm ano_idhm
## Min. :0.4180 Min. :2010
## 1st Qu.:0.6030 1st Qu.:2010
## Median :0.6690 Median :2010
## Mean :0.6613 Mean :2010
## 3rd Qu.:0.7180 3rd Qu.:2010
## Max. :0.8620 Max. :2010
##
1 mun. (cod_mun: 2102150) recusou-se a responder todas as questões
1 mun. (cod_mun: 2102150) recusou-se a responder questões sobre consórcios
Outros casos serão excluídos oportunamente
# capacities_2[which(capacities_2$sc_as == "Recusa"), ]
# capacities_2[which(capacities_2$sc_ed == "Recusa"), ]
# capacities_2[which(capacities_2$consor_ed == "Recusa"), ]
# # A tibble: 1 x 31
# cod_mun nm_mun populacao faixa_pop cod_est nm_est sg_est regiao sc_sa sc_as sc_ed pl_sa pl_as pl_ed
# <chr> <chr> <dbl> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
# 1 2102150 Brejo~ 4291 1 - Até ~ 21 Maran~ MA 2 - N~ Recu~ Recu~ Recu~ Recu~ Recu~ Recu~
# Filtrar 1 municipio com recusa para todas as variáveis
capacities_2 <- filter(capacities_2, cod_mun != 2102150)
capacities_2 <- filter(capacities_2, cod_mun != 4110508)
# 4.915 casos
#Verificar quantiles
quant_idhm <- quantile(capacities_2$idhm, c(.2,.4,.6,.8,1))
# 20% 40% 60% 80% 100%
# 0.592 0.643 0.691 0.728 0.862
quant_idhm %>% knitr::kable()
| x | |
|---|---|
| 20% | 0.592 |
| 40% | 0.643 |
| 60% | 0.691 |
| 80% | 0.728 |
| 100% | 0.862 |
capacities_2 <- capacities_2 %>%
mutate(idhm_quintil = case_when(
idhm <= 0.592 ~ "primeiro",
idhm >= 0.593 & idhm <= 0.643 ~ "segundo",
idhm >= 0.644 & idhm <= 0.691 ~ "terceiro",
idhm >= 0.692 & idhm <= 0.728 ~ "quarto",
idhm >= 0.729 ~ "quinto"))
levels_idhm = c("primeiro", "segundo", "terceiro", "quarto", "quinto")
capacities_2$idhm_quintil <- ordered(capacities_2$idhm_quintil,
levels = c(levels_idhm))
capacities_2 %>%
count(idhm_quintil, name = "qtdd_de_mun") %>%
mutate(perc = round(qtdd_de_mun/sum(qtdd_de_mun)*100, 1))
map_idhm <- capacities_2 %>%
select(cod_mun, idhm)
map_idhm <- shape_mun %>%
mutate(code_muni = as.character(code_muni)) %>%
full_join(map_idhm, by= c("code_muni" = "cod_mun"))
#####################
theme_map_manual <- function(){
theme(legend.position = c(0.2, 0.5),
legend.justification = c("right", "top"),
legend.background = element_blank(),
plot.title=element_text( hjust=0, vjust=-5, face='bold', size = 12))
}
ggplot(map_idhm)+
geom_sf(aes(fill=idhm), colour = alpha("white", .2), lty=3)+
scale_fill_viridis_c("HDI-M", option = "inferno")+
geom_sf(data=shape_estado, fill=NA, color = "black")+
ggthemes::theme_map()+
theme_map_manual()+
ggtitle("IDHM")
Transformar todas as respostas “Sim” = 1;
Todas demais opções (“Não”, “Não informado”, etc) = 0
Depois, realizar a contagem de “Sim” e classificar
levels_cat <- c("Não possui" ,"Possui 1", "Possui 2","Possui 3")
# # Verificar lables
# unique(capacities_1$pl_as)
# unique(capacities_1$pl_sa)
# unique(capacities_1$pl_ed)
# Criar dummies 1 = Sim; 0 = Não.
capacities_2 <- capacities_2 %>%
mutate_at(vars(pl_sa:pl_ed), funs(ifelse(.== "Sim", 1, 0)))
# Contar quantidade de "sim" e organizar colunas
capacities_2 <- capacities_2 %>%
mutate(Num_pl = rowSums(select(.,pl_sa:pl_ed)))%>%
relocate(Num_pl, .after = pl_ed)
# Categorizar quantidade de planos planos
capacities_2 <- capacities_2 %>%
mutate(Pl_class = case_when(
Num_pl == 3 ~ "Possui 3",
Num_pl == 2 ~ "Possui 2",
Num_pl == 1 ~ "Possui 1",
Num_pl == 0 ~ "Não possui")) %>%
relocate(Pl_class, .after = Num_pl)
capacities_2$Pl_class <- ordered(capacities_2$Pl_class,
levels = c(levels_cat))
capacities_2 %>%
count(Pl_class, name = "quantidade") %>%
mutate(perc = round(quantidade/sum(quantidade)*100,1))
sjt.xtab(capacities_2$idhm_quintil,
capacities_2$Pl_class,
encoding = "windows",
show.summary = F,
show.row.prc = T,
show.col.prc = T,
title = "Norte")
| idhm_quintil | Pl_class | Total | |||
|---|---|---|---|---|---|
| Não possui | Possui 1 | Possui 2 | Possui 3 | ||
| primeiro |
1 0.1 % 14.3 % |
90 8.3 % 40 % |
692 63.8 % 24 % |
301 27.8 % 14.2 % |
1084 100 % 20.7 % |
| segundo |
3 0.3 % 42.9 % |
48 4.7 % 21.3 % |
606 59.1 % 21 % |
368 35.9 % 17.4 % |
1025 100 % 19.6 % |
| terceiro |
3 0.3 % 42.9 % |
37 3.5 % 16.4 % |
558 53.4 % 19.4 % |
447 42.8 % 21.1 % |
1045 100 % 20 % |
| quarto |
0 0 % 0 % |
27 2.6 % 12 % |
543 51.6 % 18.8 % |
483 45.9 % 22.8 % |
1053 100 % 20.1 % |
| quinto |
0 0 % 0 % |
23 2.2 % 10.2 % |
484 47.1 % 16.8 % |
520 50.6 % 24.5 % |
1027 100 % 19.6 % |
| Total |
7 0.1 % 100 % |
225 4.3 % 100 % |
2883 55.1 % 100 % |
2119 40.5 % 100 % |
5234 100 % 100 % |
# # Verificar lables
# unique(capacities_1$chl_as)
# unique(capacities_1$chl_sa)
# unique(capacities_1$chl_ed)
# Criar dummies 1 = Sim; 0 = Não.
capacities_2 <- capacities_2 %>%
mutate_at(vars(chl_sa:chl_ed), funs(ifelse(.== "Sim", 1, 0)))
# Contar quantidade de "sim" e organizar colunas
capacities_2 <- capacities_2 %>%
mutate(Num_chl = rowSums(select(.,chl_sa:chl_ed)))%>%
relocate(Num_chl, .after = chl_ed)
# Categorizar quantidade de planos planos
capacities_2 <- capacities_2 %>%
mutate(Chl_class = case_when(
Num_chl == 3 ~ "Possui 3",
Num_chl == 2 ~ "Possui 2",
Num_chl == 1 ~ "Possui 1",
Num_chl == 0 ~ "Não possui")) %>%
relocate(Chl_class, .after = Num_chl)
capacities_2$Chl_class <- ordered(capacities_2$Chl_class,
levels = c(levels_cat))
capacities_2 %>%
count(Chl_class, name = "quantidade") %>%
mutate(perc = round(quantidade/sum(quantidade)*100, 1))
sjt.xtab(capacities_2$idhm_quintil,
capacities_2$Chl_class,
encoding = "windows",
show.summary = F,
show.row.prc = T,
show.col.prc = T,
title = "Norte")
| idhm_quintil | Chl_class | Total | |||
|---|---|---|---|---|---|
| Não possui | Possui 1 | Possui 2 | Possui 3 | ||
| primeiro |
0 0 % 0 % |
1 0.1 % 12.5 % |
188 17.3 % 30.2 % |
895 82.6 % 19.4 % |
1084 100 % 20.7 % |
| segundo |
0 0 % 0 % |
2 0.2 % 25 % |
155 15.1 % 24.9 % |
868 84.7 % 18.9 % |
1025 100 % 19.6 % |
| terceiro |
0 0 % 0 % |
3 0.3 % 37.5 % |
144 13.8 % 23.1 % |
898 85.9 % 19.5 % |
1045 100 % 20 % |
| quarto |
0 0 % 0 % |
1 0.1 % 12.5 % |
97 9.2 % 15.6 % |
955 90.7 % 20.7 % |
1053 100 % 20.1 % |
| quinto |
0 0 % 0 % |
1 0.1 % 12.5 % |
39 3.8 % 6.3 % |
987 96.1 % 21.4 % |
1027 100 % 19.6 % |
| Total |
0 0 % 100 % |
8 0.2 % 100 % |
623 11.9 % 100 % |
4603 87.9 % 100 % |
5234 100 % 100 % |
# # Assistência Social
# levels(capacities_2$consor_as)
# levels(capacities_2$consor_ed)
# levels(capacities_2$consor_sa)
capacities_2 <- capacities_2 %>%
mutate_at(vars(consor_ed:consor_as),
funs(ifelse(.== "Sim", 1, 0)))
# Contar quantidade de "sim" e organizar colunas
capacities_2 <- capacities_2 %>%
mutate(Num_consor = rowSums(select(.,consor_ed:consor_as)))%>%
relocate(Num_consor, .after = consor_as)
# Categorizar quantidade de planos planos
capacities_2 <- capacities_2 %>%
mutate(Consor_class = case_when(
Num_consor == 3 ~ "Possui 3",
Num_consor == 2 ~ "Possui 2",
Num_consor == 1 ~ "Possui 1",
Num_consor == 0 ~ "Não possui")) %>%
relocate(Consor_class, .after = Num_consor)
capacities_2$Consor_class <- ordered(capacities_2$Consor_class,
levels = c(levels_cat))
capacities_2 %>%
count(Consor_class, name = "quantidade") %>%
mutate(perc = round(quantidade/sum(quantidade)*100,1))
sjt.xtab(capacities_2$idhm_quintil,
capacities_2$Consor_class,
encoding = "windows",
show.summary = F,
show.row.prc = T,
show.col.prc = T,
title = "Norte")
| idhm_quintil | Consor_class | Total | |||
|---|---|---|---|---|---|
| Não possui | Possui 1 | Possui 2 | Possui 3 | ||
| primeiro |
774 71.4 % 31.4 % |
211 19.5 % 9.1 % |
28 2.6 % 13.5 % |
71 6.5 % 29.7 % |
1084 100 % 20.7 % |
| segundo |
526 51.3 % 21.4 % |
424 41.4 % 18.2 % |
33 3.2 % 15.9 % |
42 4.1 % 17.6 % |
1025 100 % 19.6 % |
| terceiro |
383 36.7 % 15.6 % |
581 55.6 % 25 % |
51 4.9 % 24.6 % |
30 2.9 % 12.6 % |
1045 100 % 20 % |
| quarto |
354 33.6 % 14.4 % |
603 57.3 % 25.9 % |
57 5.4 % 27.5 % |
39 3.7 % 16.3 % |
1053 100 % 20.1 % |
| quinto |
425 41.4 % 17.3 % |
507 49.4 % 21.8 % |
38 3.7 % 18.4 % |
57 5.6 % 23.8 % |
1027 100 % 19.6 % |
| Total |
2462 47 % 100 % |
2326 44.4 % 100 % |
207 4 % 100 % |
239 4.6 % 100 % |
5234 100 % 100 % |
Criada para preservar os dados brutos em caso de reutilização
capacities_3 <- capacities_2
capacities_3 %>%
datatable(extensions = 'Buttons',
rownames = F,
options = list(dom = 'Blfrtip',
buttons = c('csv', 'excel'),
autoFill = TRUE,
fixedHeader = TRUE,
autowidth = TRUE,
paging = F,
scrollX = TRUE,
scrollY = "400px"))
# Verificar existência de NAs
DataExplorer::plot_missing(capacities_3)
# Uso particular para configurações
# setwd("C:/r_files/my_academic_projects/capacidades/capacitties")
#save.image("capacities_raw_data_3.RData")
#load("capacities_raw_data_3.RData")
capacities_4 <- capacities_3
cap_cor <- capacities_4 %>%
select(pib_per_cap, idhm, starts_with("Num"), nota_transp, populacao)
corr <- round(cor(cap_cor), 2)
ggcorrplot(corr, hc.order = F,
type = "lower",
show.diag = T,
pch = 12,
pch.cex = 12,
tl.cex = 12,
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlação entre variáveis numéricas",
ggtheme=theme_bw)
vglm, do pacote ‘VGAM’, e os argumentos family = cumulative(parallel = T, reverse = T)No artigo foram reportadas apenas a coluna Estimate e o p-value
olr_brasil <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = capacities_4, family = cumulative(parallel = T, reverse = T))
summary(olr_brasil)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = capacities_4)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -43.80 0.03139 0.10582 0.27232 4.769
## logitlink(P[Y>=3]) -34.89 -0.29389 0.10782 0.34114 55.185
## logitlink(P[Y>=4]) -370.44 -0.28485 -0.10509 0.36658 7.270
## logitlink(P[Y>=5]) -49.46 -0.33983 -0.09526 -0.04057 7.849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -29.26644 0.59911 -48.850 < 2e-16 ***
## (Intercept):2 -30.95656 0.61420 -50.402 < 2e-16 ***
## (Intercept):3 -32.51208 0.63052 -51.564 < 2e-16 ***
## (Intercept):4 -34.15556 0.64611 -52.863 < 2e-16 ***
## log(pib_per_cap) 3.00697 0.05977 50.311 < 2e-16 ***
## Num_pl 0.30523 0.04735 6.447 1.14e-10 ***
## Num_chl 0.56459 0.08134 6.941 3.89e-12 ***
## Num_consor 0.19052 0.03552 5.364 8.15e-08 ***
## nota_transp 0.08806 0.01038 8.481 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 12553.48 on 20927 degrees of freedom
##
## Log-likelihood: -6276.738 on 20927 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'log(pib_per_cap)'
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 20.226011 1.356931 1.758728 1.209883
## nota_transp
## 1.092059
norte <- capacities_4 %>%
filter(regiao == "1 - Norte")
olr_norte <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = norte, family = cumulative(parallel = T, reverse = T))
summary(olr_norte)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = norte)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -9.340 -0.6557 0.21747 0.68174 2.820
## logitlink(P[Y>=3]) -7.412 -0.5635 -0.21875 0.33111 3.869
## logitlink(P[Y>=4]) -3.762 -0.2162 -0.10028 -0.06227 11.660
## logitlink(P[Y>=5]) -2.783 -0.0971 -0.05959 -0.03689 4.521
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -27.09159 2.33549 -11.600 <2e-16 ***
## (Intercept):2 -29.01928 2.38796 -12.152 <2e-16 ***
## (Intercept):3 -31.32731 2.46686 -12.699 <2e-16 ***
## (Intercept):4 -32.40591 2.51100 -12.906 <2e-16 ***
## log(pib_per_cap) 2.75557 0.24681 11.165 <2e-16 ***
## Num_pl 0.20443 0.16825 1.215 0.2243
## Num_chl 0.53748 0.21602 2.488 0.0128 *
## Num_consor -0.05875 0.14735 -0.399 0.6901
## nota_transp 0.04977 0.03563 1.397 0.1624
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 902.647 on 1683 degrees of freedom
##
## Log-likelihood: -451.3235 on 1683 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1', '(Intercept):3'
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 15.7300643 1.2268312 1.7116833 0.9429448
## nota_transp
## 1.0510292
nordeste <- capacities_4 %>%
filter(regiao == "2 - Nordeste")
olr_nordeste <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = nordeste, family = cumulative(parallel = T, reverse = T))
summary(olr_nordeste)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = nordeste)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -10.421 -0.77451 -0.46641 0.93483 3.062
## logitlink(P[Y>=3]) -5.681 -0.33778 -0.17466 -0.12464 5.333
## logitlink(P[Y>=4]) -1.396 -0.08197 -0.05972 -0.04565 17.767
## logitlink(P[Y>=5]) -2.595 -0.03640 -0.02651 -0.02030 13.393
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -21.99901 1.22276 -17.991 < 2e-16 ***
## (Intercept):2 -24.54938 1.26115 -19.466 < 2e-16 ***
## (Intercept):3 -26.63535 1.31052 -20.324 < 2e-16 ***
## (Intercept):4 -28.24505 1.37289 -20.573 < 2e-16 ***
## log(pib_per_cap) 2.15806 0.12471 17.304 < 2e-16 ***
## Num_pl 0.45680 0.09299 4.913 8.99e-07 ***
## Num_chl 0.38737 0.17700 2.189 0.0286 *
## Num_consor 0.07538 0.06665 1.131 0.2581
## nota_transp 0.07731 0.01959 3.947 7.90e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 2800.062 on 6459 degrees of freedom
##
## Log-likelihood: -1400.031 on 6459 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2', '(Intercept):4'
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 8.654291 1.579018 1.473108 1.078293
## nota_transp
## 1.080382
centro_oeste <- capacities_4 %>%
filter(regiao == "5 - Centro-Oeste")
olr_centro_oeste <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = centro_oeste, family = cumulative(parallel = T, reverse = T))
summary(olr_centro_oeste)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = centro_oeste)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -14.735 0.04734 0.0650 0.08905 0.512
## logitlink(P[Y>=3]) -5.971 0.12620 0.2105 0.37207 1.271
## logitlink(P[Y>=4]) -3.770 -0.87727 0.1999 0.87472 3.894
## logitlink(P[Y>=5]) -1.281 -0.44309 -0.2286 -0.13645 4.213
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -11.41377 1.85820 -6.142 8.13e-10 ***
## (Intercept):2 -13.75266 1.82499 -7.536 4.86e-14 ***
## (Intercept):3 -16.30317 1.86149 -8.758 < 2e-16 ***
## (Intercept):4 -18.55585 1.91209 -9.704 < 2e-16 ***
## log(pib_per_cap) 1.39548 0.17544 7.954 1.81e-15 ***
## Num_pl 0.36194 0.17277 2.095 0.03618 *
## Num_chl 0.66199 0.21066 3.142 0.00168 **
## Num_consor -0.63411 0.12143 -5.222 1.77e-07 ***
## nota_transp 0.01938 0.03535 0.548 0.58356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 1032.208 on 1799 degrees of freedom
##
## Log-likelihood: -516.104 on 1799 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 4.0368938 1.4361059 1.9386415 0.5304061
## nota_transp
## 1.0195656
sudeste <- capacities_4 %>%
filter(regiao == "3 - Sudeste")
olr_sudeste <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = sudeste, family = cumulative(parallel = T, reverse = T))
summary(olr_sudeste)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = sudeste)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -8.952 0.03505 0.06375 0.1236 0.9199
## logitlink(P[Y>=3]) -6.660 0.06125 0.13717 0.3638 17.4410
## logitlink(P[Y>=4]) -165.294 -0.44732 0.17813 0.5577 3.8795
## logitlink(P[Y>=5]) -28.260 -0.58595 -0.16078 0.5330 5.6784
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -22.49084 1.15799 -19.422 < 2e-16 ***
## (Intercept):2 -24.71880 1.16934 -21.139 < 2e-16 ***
## (Intercept):3 -26.67180 1.19638 -22.294 < 2e-16 ***
## (Intercept):4 -28.42746 1.22186 -23.266 < 2e-16 ***
## log(pib_per_cap) 2.46284 0.11500 21.416 < 2e-16 ***
## Num_pl 0.04993 0.08723 0.572 0.567
## Num_chl 0.92412 0.17608 5.248 1.54e-07 ***
## Num_consor -0.48784 0.07837 -6.225 4.83e-10 ***
## nota_transp 0.17070 0.02076 8.223 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 3573.381 on 6303 degrees of freedom
##
## Log-likelihood: -1786.691 on 6303 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'log(pib_per_cap)'
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 11.7381161 1.0511938 2.5196503 0.6139533
## nota_transp
## 1.1861390
sul <- capacities_4 %>%
filter(regiao == "4 - Sul")
olr_sul <- vglm(idhm_quintil ~ log(pib_per_cap) +
Num_pl + Num_chl + Num_consor + nota_transp,
data = sul, family = cumulative(parallel = T, reverse = T))
summary(olr_sul)
##
## Call:
## vglm(formula = idhm_quintil ~ log(pib_per_cap) + Num_pl + Num_chl +
## Num_consor + nota_transp, family = cumulative(parallel = T,
## reverse = T), data = sul)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -8.814 0.02784 0.04153 0.0599 0.9119
## logitlink(P[Y>=3]) -16.295 0.08605 0.13305 0.2500 1.3001
## logitlink(P[Y>=4]) -13.279 -0.39321 0.25031 0.6572 2.3672
## logitlink(P[Y>=5]) -5.628 -0.72703 -0.27732 0.7930 4.3159
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -21.71768 1.79491 -12.100 < 2e-16 ***
## (Intercept):2 -24.40850 1.74764 -13.967 < 2e-16 ***
## (Intercept):3 -26.47942 1.76257 -15.023 < 2e-16 ***
## (Intercept):4 -28.28458 1.78813 -15.818 < 2e-16 ***
## log(pib_per_cap) 2.56114 0.17023 15.046 < 2e-16 ***
## Num_pl 0.06563 0.10329 0.635 0.525178
## Num_chl 0.33007 0.18739 1.761 0.078168 .
## Num_consor 0.08757 0.07953 1.101 0.270857
## nota_transp 0.08505 0.02572 3.307 0.000943 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5])
##
## Residual deviance: 2586.373 on 4647 degrees of freedom
##
## Log-likelihood: -1293.187 on 4647 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'log(pib_per_cap)'
##
##
## Exponentiated coefficients:
## log(pib_per_cap) Num_pl Num_chl Num_consor
## 12.950555 1.067829 1.391059 1.091516
## nota_transp
## 1.088769
# Uso particular para configurações
#setwd("C:/r_files/my_academic_projects/capacidades/capacitties")
#save.image("capacities_raw_data_4.RData")
#load("capacities_raw_data_4.RData")
# rm(list = setdiff(ls(), "map"))
#save.image("capacities_raw_data_shapes.RData")
#load("capacities_raw_data_shapes.RData")
map <- shape_mun %>%
select(cod_mun = code_muni, geom) %>%
mutate(cod_mun = as.character(cod_mun)) %>%
left_join(capacities_4)
# #save(map, "map.RData")
# theme_map_manual <- function(){
# theme(legend.position = c(0.35, 0.5),
# legend.justification = c("right", "top"),
# legend.background = element_blank(),
# plot.title=element_text( hjust=0, vjust=-5, face='bold', size = 12))
# }
levels_map <- levels(map$Pl_class)
# inverter ordem e incluir NA
levels_map <- c(levels_map[c(4:1)], NA)
# Definir cores
colors_map <- c(scales::viridis_pal(option = "D")(4), "gray40")
# Definir cor para cada level
names(colors_map) <- levels_map
# Definir rótulos
labels_map <- levels_map
labels_map[5] <- "Não analisados"
map_pl <- map %>%
ggplot()+
geom_sf(aes(fill = fct_infreq(Pl_class)), colour = alpha("white", .3), lty=3)+
scale_fill_manual(name = "Planos",
values = colors_map,
na.value = "gray40",
limits = levels_map,
breaks = levels_map,
labels = labels_map)+
geom_sf(data=shape_estado, fill=NA, color = "black")+
ggthemes::theme_map()+
theme_map_manual()+
ggtitle("Planos")
map_consor <- map %>%
ggplot()+
geom_sf(aes(fill = fct_infreq(Consor_class)), colour = alpha("white", .5), lty=3)+
scale_fill_manual(name = "Consorcios",
values = colors_map,
na.value = "gray40",
limits = levels_map,
breaks = levels_map,
labels = labels_map)+
geom_sf(data=shape_estado, fill=NA, color = "black")+
ggthemes::theme_map()+
theme_map_manual()+
ggtitle("Consorcios")
map_chl <- map %>%
ggplot()+
geom_sf(aes(fill = fct_infreq(Chl_class)),
colour = alpha("white", .3), lty=3)+
scale_fill_manual(name = "Conselhos",
values = colors_map,
na.value = "gray40",
limits = levels_map,
breaks = levels_map,
labels = labels_map)+
geom_sf(data=shape_estado, fill=NA, color = "black")+
ggthemes::theme_map()+
theme_map_manual()+
ggtitle("Conselhos")
map_transp <- map %>%
ggplot()+
geom_sf(aes(fill = nota_transp),
colour = alpha("white", .3), lty=3)+
scale_fill_viridis_c(name = "Transparencia",
option = "D", direction=-1)+
geom_sf(data=shape_estado, fill=NA, color = "black")+
ggthemes::theme_map()+
theme_map_manual()+
theme(legend.justification = c("center", "top"))+
ggtitle("Transparencia")
mapas <- gridExtra::grid.arrange(map_pl, map_chl, map_consor, map_transp, ncol = 2)
mapas
## TableGrob (2 x 2) "arrange": 4 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]